Load Packages
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
source("functions.R")
I downloaded file
download.file("https://raw.githubusercontent.com/swcarpentry/r-novice-gapminder/gh-pages/_episodes_rmd/data/gapminder-FiveYearData.csv", destfile = "data/gapminder-FiveYearData.csv")
gapminder <- read.csv("data/gapminder-FiveYearData.csv")
gapminder <- read.csv("data/gapminder-FiveYearData.csv")
head(gapminder)
## country year pop continent lifeExp gdpPercap
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134
I wonder what is the life exp over the years
p <- ggplot(data=gapminder, aes(x=year,y=lifeExp)) +
geom_point()
p
lets do see
ggplotly(p)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
If you are repeating yourself in your code, you may be able to solve that problem by making your own function!
cars <- c(3,4,5,6,7,10)
se(cars)
## [1] 1.013794
dplyrYou will likely want to get subsections of your dataframe and/or calculate means of a variable for a certain subsection, dplyr is your friend!
gapminder <- read.csv("data/gapminder-FiveYearData.csv")
year_country_gdp <- select(gapminder,year,country,gdpPercap)
year_country_gdp <- select(gapminder,-pop,-continent,-lifeExp)
names(year_country_gdp)
## [1] "country" "year" "gdpPercap"
# Long and bad performance way, but works for smal data
euro <- filter(gapminder, continent=="Europe")
year_country_gdp_euro <- select (euro, year, country, gdpPercap)
#Better way and fast processing
year_country_gdp_euro <- gapminder %>%
filter(continent=="Europe") %>%
select(year, country, gdpPercap)
year_country_gdp_euro
## year country gdpPercap
## 1 1952 Albania 1601.0561
## 2 1957 Albania 1942.2842
## 3 1962 Albania 2312.8890
## 4 1967 Albania 2760.1969
## 5 1972 Albania 3313.4222
## 6 1977 Albania 3533.0039
## 7 1982 Albania 3630.8807
## 8 1987 Albania 3738.9327
## 9 1992 Albania 2497.4379
## 10 1997 Albania 3193.0546
## 11 2002 Albania 4604.2117
## 12 2007 Albania 5937.0295
## 13 1952 Austria 6137.0765
## 14 1957 Austria 8842.5980
## 15 1962 Austria 10750.7211
## 16 1967 Austria 12834.6024
## 17 1972 Austria 16661.6256
## 18 1977 Austria 19749.4223
## 19 1982 Austria 21597.0836
## 20 1987 Austria 23687.8261
## 21 1992 Austria 27042.0187
## 22 1997 Austria 29095.9207
## 23 2002 Austria 32417.6077
## 24 2007 Austria 36126.4927
## 25 1952 Belgium 8343.1051
## 26 1957 Belgium 9714.9606
## 27 1962 Belgium 10991.2068
## 28 1967 Belgium 13149.0412
## 29 1972 Belgium 16672.1436
## 30 1977 Belgium 19117.9745
## 31 1982 Belgium 20979.8459
## 32 1987 Belgium 22525.5631
## 33 1992 Belgium 25575.5707
## 34 1997 Belgium 27561.1966
## 35 2002 Belgium 30485.8838
## 36 2007 Belgium 33692.6051
## 37 1952 Bosnia and Herzegovina 973.5332
## 38 1957 Bosnia and Herzegovina 1353.9892
## 39 1962 Bosnia and Herzegovina 1709.6837
## 40 1967 Bosnia and Herzegovina 2172.3524
## 41 1972 Bosnia and Herzegovina 2860.1698
## 42 1977 Bosnia and Herzegovina 3528.4813
## 43 1982 Bosnia and Herzegovina 4126.6132
## 44 1987 Bosnia and Herzegovina 4314.1148
## 45 1992 Bosnia and Herzegovina 2546.7814
## 46 1997 Bosnia and Herzegovina 4766.3559
## 47 2002 Bosnia and Herzegovina 6018.9752
## 48 2007 Bosnia and Herzegovina 7446.2988
## 49 1952 Bulgaria 2444.2866
## 50 1957 Bulgaria 3008.6707
## 51 1962 Bulgaria 4254.3378
## 52 1967 Bulgaria 5577.0028
## 53 1972 Bulgaria 6597.4944
## 54 1977 Bulgaria 7612.2404
## 55 1982 Bulgaria 8224.1916
## 56 1987 Bulgaria 8239.8548
## 57 1992 Bulgaria 6302.6234
## 58 1997 Bulgaria 5970.3888
## 59 2002 Bulgaria 7696.7777
## 60 2007 Bulgaria 10680.7928
## 61 1952 Croatia 3119.2365
## 62 1957 Croatia 4338.2316
## 63 1962 Croatia 5477.8900
## 64 1967 Croatia 6960.2979
## 65 1972 Croatia 9164.0901
## 66 1977 Croatia 11305.3852
## 67 1982 Croatia 13221.8218
## 68 1987 Croatia 13822.5839
## 69 1992 Croatia 8447.7949
## 70 1997 Croatia 9875.6045
## 71 2002 Croatia 11628.3890
## 72 2007 Croatia 14619.2227
## 73 1952 Czech Republic 6876.1403
## 74 1957 Czech Republic 8256.3439
## 75 1962 Czech Republic 10136.8671
## 76 1967 Czech Republic 11399.4449
## 77 1972 Czech Republic 13108.4536
## 78 1977 Czech Republic 14800.1606
## 79 1982 Czech Republic 15377.2285
## 80 1987 Czech Republic 16310.4434
## 81 1992 Czech Republic 14297.0212
## 82 1997 Czech Republic 16048.5142
## 83 2002 Czech Republic 17596.2102
## 84 2007 Czech Republic 22833.3085
## 85 1952 Denmark 9692.3852
## 86 1957 Denmark 11099.6593
## 87 1962 Denmark 13583.3135
## 88 1967 Denmark 15937.2112
## 89 1972 Denmark 18866.2072
## 90 1977 Denmark 20422.9015
## 91 1982 Denmark 21688.0405
## 92 1987 Denmark 25116.1758
## 93 1992 Denmark 26406.7399
## 94 1997 Denmark 29804.3457
## 95 2002 Denmark 32166.5001
## 96 2007 Denmark 35278.4187
## 97 1952 Finland 6424.5191
## 98 1957 Finland 7545.4154
## 99 1962 Finland 9371.8426
## 100 1967 Finland 10921.6363
## 101 1972 Finland 14358.8759
## 102 1977 Finland 15605.4228
## 103 1982 Finland 18533.1576
## 104 1987 Finland 21141.0122
## 105 1992 Finland 20647.1650
## 106 1997 Finland 23723.9502
## 107 2002 Finland 28204.5906
## 108 2007 Finland 33207.0844
## 109 1952 France 7029.8093
## 110 1957 France 8662.8349
## 111 1962 France 10560.4855
## 112 1967 France 12999.9177
## 113 1972 France 16107.1917
## 114 1977 France 18292.6351
## 115 1982 France 20293.8975
## 116 1987 France 22066.4421
## 117 1992 France 24703.7961
## 118 1997 France 25889.7849
## 119 2002 France 28926.0323
## 120 2007 France 30470.0167
## 121 1952 Germany 7144.1144
## 122 1957 Germany 10187.8267
## 123 1962 Germany 12902.4629
## 124 1967 Germany 14745.6256
## 125 1972 Germany 18016.1803
## 126 1977 Germany 20512.9212
## 127 1982 Germany 22031.5327
## 128 1987 Germany 24639.1857
## 129 1992 Germany 26505.3032
## 130 1997 Germany 27788.8842
## 131 2002 Germany 30035.8020
## 132 2007 Germany 32170.3744
## 133 1952 Greece 3530.6901
## 134 1957 Greece 4916.2999
## 135 1962 Greece 6017.1907
## 136 1967 Greece 8513.0970
## 137 1972 Greece 12724.8296
## 138 1977 Greece 14195.5243
## 139 1982 Greece 15268.4209
## 140 1987 Greece 16120.5284
## 141 1992 Greece 17541.4963
## 142 1997 Greece 18747.6981
## 143 2002 Greece 22514.2548
## 144 2007 Greece 27538.4119
## 145 1952 Hungary 5263.6738
## 146 1957 Hungary 6040.1800
## 147 1962 Hungary 7550.3599
## 148 1967 Hungary 9326.6447
## 149 1972 Hungary 10168.6561
## 150 1977 Hungary 11674.8374
## 151 1982 Hungary 12545.9907
## 152 1987 Hungary 12986.4800
## 153 1992 Hungary 10535.6285
## 154 1997 Hungary 11712.7768
## 155 2002 Hungary 14843.9356
## 156 2007 Hungary 18008.9444
## 157 1952 Iceland 7267.6884
## 158 1957 Iceland 9244.0014
## 159 1962 Iceland 10350.1591
## 160 1967 Iceland 13319.8957
## 161 1972 Iceland 15798.0636
## 162 1977 Iceland 19654.9625
## 163 1982 Iceland 23269.6075
## 164 1987 Iceland 26923.2063
## 165 1992 Iceland 25144.3920
## 166 1997 Iceland 28061.0997
## 167 2002 Iceland 31163.2020
## 168 2007 Iceland 36180.7892
## 169 1952 Ireland 5210.2803
## 170 1957 Ireland 5599.0779
## 171 1962 Ireland 6631.5973
## 172 1967 Ireland 7655.5690
## 173 1972 Ireland 9530.7729
## 174 1977 Ireland 11150.9811
## 175 1982 Ireland 12618.3214
## 176 1987 Ireland 13872.8665
## 177 1992 Ireland 17558.8155
## 178 1997 Ireland 24521.9471
## 179 2002 Ireland 34077.0494
## 180 2007 Ireland 40675.9964
## 181 1952 Italy 4931.4042
## 182 1957 Italy 6248.6562
## 183 1962 Italy 8243.5823
## 184 1967 Italy 10022.4013
## 185 1972 Italy 12269.2738
## 186 1977 Italy 14255.9847
## 187 1982 Italy 16537.4835
## 188 1987 Italy 19207.2348
## 189 1992 Italy 22013.6449
## 190 1997 Italy 24675.0245
## 191 2002 Italy 27968.0982
## 192 2007 Italy 28569.7197
## 193 1952 Montenegro 2647.5856
## 194 1957 Montenegro 3682.2599
## 195 1962 Montenegro 4649.5938
## 196 1967 Montenegro 5907.8509
## 197 1972 Montenegro 7778.4140
## 198 1977 Montenegro 9595.9299
## 199 1982 Montenegro 11222.5876
## 200 1987 Montenegro 11732.5102
## 201 1992 Montenegro 7003.3390
## 202 1997 Montenegro 6465.6133
## 203 2002 Montenegro 6557.1943
## 204 2007 Montenegro 9253.8961
## 205 1952 Netherlands 8941.5719
## 206 1957 Netherlands 11276.1934
## 207 1962 Netherlands 12790.8496
## 208 1967 Netherlands 15363.2514
## 209 1972 Netherlands 18794.7457
## 210 1977 Netherlands 21209.0592
## 211 1982 Netherlands 21399.4605
## 212 1987 Netherlands 23651.3236
## 213 1992 Netherlands 26790.9496
## 214 1997 Netherlands 30246.1306
## 215 2002 Netherlands 33724.7578
## 216 2007 Netherlands 36797.9333
## 217 1952 Norway 10095.4217
## 218 1957 Norway 11653.9730
## 219 1962 Norway 13450.4015
## 220 1967 Norway 16361.8765
## 221 1972 Norway 18965.0555
## 222 1977 Norway 23311.3494
## 223 1982 Norway 26298.6353
## 224 1987 Norway 31540.9748
## 225 1992 Norway 33965.6611
## 226 1997 Norway 41283.1643
## 227 2002 Norway 44683.9753
## 228 2007 Norway 49357.1902
## 229 1952 Poland 4029.3297
## 230 1957 Poland 4734.2530
## 231 1962 Poland 5338.7521
## 232 1967 Poland 6557.1528
## 233 1972 Poland 8006.5070
## 234 1977 Poland 9508.1415
## 235 1982 Poland 8451.5310
## 236 1987 Poland 9082.3512
## 237 1992 Poland 7738.8812
## 238 1997 Poland 10159.5837
## 239 2002 Poland 12002.2391
## 240 2007 Poland 15389.9247
## 241 1952 Portugal 3068.3199
## 242 1957 Portugal 3774.5717
## 243 1962 Portugal 4727.9549
## 244 1967 Portugal 6361.5180
## 245 1972 Portugal 9022.2474
## 246 1977 Portugal 10172.4857
## 247 1982 Portugal 11753.8429
## 248 1987 Portugal 13039.3088
## 249 1992 Portugal 16207.2666
## 250 1997 Portugal 17641.0316
## 251 2002 Portugal 19970.9079
## 252 2007 Portugal 20509.6478
## 253 1952 Romania 3144.6132
## 254 1957 Romania 3943.3702
## 255 1962 Romania 4734.9976
## 256 1967 Romania 6470.8665
## 257 1972 Romania 8011.4144
## 258 1977 Romania 9356.3972
## 259 1982 Romania 9605.3141
## 260 1987 Romania 9696.2733
## 261 1992 Romania 6598.4099
## 262 1997 Romania 7346.5476
## 263 2002 Romania 7885.3601
## 264 2007 Romania 10808.4756
## 265 1952 Serbia 3581.4594
## 266 1957 Serbia 4981.0909
## 267 1962 Serbia 6289.6292
## 268 1967 Serbia 7991.7071
## 269 1972 Serbia 10522.0675
## 270 1977 Serbia 12980.6696
## 271 1982 Serbia 15181.0927
## 272 1987 Serbia 15870.8785
## 273 1992 Serbia 9325.0682
## 274 1997 Serbia 7914.3203
## 275 2002 Serbia 7236.0753
## 276 2007 Serbia 9786.5347
## 277 1952 Slovak Republic 5074.6591
## 278 1957 Slovak Republic 6093.2630
## 279 1962 Slovak Republic 7481.1076
## 280 1967 Slovak Republic 8412.9024
## 281 1972 Slovak Republic 9674.1676
## 282 1977 Slovak Republic 10922.6640
## 283 1982 Slovak Republic 11348.5459
## 284 1987 Slovak Republic 12037.2676
## 285 1992 Slovak Republic 9498.4677
## 286 1997 Slovak Republic 12126.2306
## 287 2002 Slovak Republic 13638.7784
## 288 2007 Slovak Republic 18678.3144
## 289 1952 Slovenia 4215.0417
## 290 1957 Slovenia 5862.2766
## 291 1962 Slovenia 7402.3034
## 292 1967 Slovenia 9405.4894
## 293 1972 Slovenia 12383.4862
## 294 1977 Slovenia 15277.0302
## 295 1982 Slovenia 17866.7218
## 296 1987 Slovenia 18678.5349
## 297 1992 Slovenia 14214.7168
## 298 1997 Slovenia 17161.1073
## 299 2002 Slovenia 20660.0194
## 300 2007 Slovenia 25768.2576
## 301 1952 Spain 3834.0347
## 302 1957 Spain 4564.8024
## 303 1962 Spain 5693.8439
## 304 1967 Spain 7993.5123
## 305 1972 Spain 10638.7513
## 306 1977 Spain 13236.9212
## 307 1982 Spain 13926.1700
## 308 1987 Spain 15764.9831
## 309 1992 Spain 18603.0645
## 310 1997 Spain 20445.2990
## 311 2002 Spain 24835.4717
## 312 2007 Spain 28821.0637
## 313 1952 Sweden 8527.8447
## 314 1957 Sweden 9911.8782
## 315 1962 Sweden 12329.4419
## 316 1967 Sweden 15258.2970
## 317 1972 Sweden 17832.0246
## 318 1977 Sweden 18855.7252
## 319 1982 Sweden 20667.3812
## 320 1987 Sweden 23586.9293
## 321 1992 Sweden 23880.0168
## 322 1997 Sweden 25266.5950
## 323 2002 Sweden 29341.6309
## 324 2007 Sweden 33859.7484
## 325 1952 Switzerland 14734.2327
## 326 1957 Switzerland 17909.4897
## 327 1962 Switzerland 20431.0927
## 328 1967 Switzerland 22966.1443
## 329 1972 Switzerland 27195.1130
## 330 1977 Switzerland 26982.2905
## 331 1982 Switzerland 28397.7151
## 332 1987 Switzerland 30281.7046
## 333 1992 Switzerland 31871.5303
## 334 1997 Switzerland 32135.3230
## 335 2002 Switzerland 34480.9577
## 336 2007 Switzerland 37506.4191
## 337 1952 Turkey 1969.1010
## 338 1957 Turkey 2218.7543
## 339 1962 Turkey 2322.8699
## 340 1967 Turkey 2826.3564
## 341 1972 Turkey 3450.6964
## 342 1977 Turkey 4269.1223
## 343 1982 Turkey 4241.3563
## 344 1987 Turkey 5089.0437
## 345 1992 Turkey 5678.3483
## 346 1997 Turkey 6601.4299
## 347 2002 Turkey 6508.0857
## 348 2007 Turkey 8458.2764
## 349 1952 United Kingdom 9979.5085
## 350 1957 United Kingdom 11283.1779
## 351 1962 United Kingdom 12477.1771
## 352 1967 United Kingdom 14142.8509
## 353 1972 United Kingdom 15895.1164
## 354 1977 United Kingdom 17428.7485
## 355 1982 United Kingdom 18232.4245
## 356 1987 United Kingdom 21664.7877
## 357 1992 United Kingdom 22705.0925
## 358 1997 United Kingdom 26074.5314
## 359 2002 United Kingdom 29478.9992
## 360 2007 United Kingdom 33203.2613
mean_gpd_percountry <- gapminder %>%
group_by(country) %>%
summarise(mean_gdp=mean(gdpPercap),
se_gdp=se(gdpPercap)
)
mean_gpd_percountry
## # A tibble: 142 x 3
## country mean_gdp se_gdp
## <fctr> <dbl> <dbl>
## 1 Afghanistan 802.6746 31.23550
## 2 Albania 3255.3666 344.20223
## 3 Algeria 4426.0260 378.26190
## 4 Angola 3607.1005 336.56641
## 5 Argentina 8955.5538 537.68144
## 6 Australia 19980.5956 2256.11315
## 7 Austria 20411.9163 2787.23968
## 8 Bahrain 18077.6639 1563.29518
## 9 Bangladesh 817.5588 67.86165
## 10 Belgium 19900.7581 2422.32683
## # ... with 132 more rows
Challenge: I want the mean, se, and sample size of life expentancy by continent
mean_se_sample <- gapminder %>%
group_by(continent) %>%
summarise(mean_lifeExp=mean(lifeExp),
se_lifeExp=se(lifeExp),
samsize_lifeExp=n())
mean_se_sample
## # A tibble: 5 x 4
## continent mean_lifeExp se_lifeExp samsize_lifeExp
## <fctr> <dbl> <dbl> <int>
## 1 Africa 48.86533 0.3663016 624
## 2 Americas 64.65874 0.5395389 300
## 3 Asia 60.06490 0.5962151 396
## 4 Europe 71.90369 0.2863536 360
## 5 Oceania 74.32621 0.7747759 24
By continent and country
mean_se_sample <- gapminder %>%
group_by(continent, country) %>%
summarise(mean_lifeExp=mean(lifeExp),
se_lifeExp=se(lifeExp),
samsize_lifeExp=n())
mean_se_sample
## # A tibble: 142 x 5
## # Groups: continent [?]
## continent country mean_lifeExp se_lifeExp
## <fctr> <fctr> <dbl> <dbl>
## 1 Africa Algeria 59.03017 2.9849208
## 2 Africa Angola 37.88350 1.1562236
## 3 Africa Benin 48.77992 1.7691977
## 4 Africa Botswana 54.59750 1.7116922
## 5 Africa Burkina Faso 44.69400 1.9762099
## 6 Africa Burundi 44.81733 0.9165096
## 7 Africa Cameroon 48.12850 1.5784640
## 8 Africa Central African Republic 43.86692 1.3627459
## 9 Africa Chad 46.77358 1.4110376
## 10 Africa Comoros 52.38175 2.3476081
## # ... with 132 more rows, and 1 more variables: samsize_lifeExp <int>
combining ggplot and dplyr
az_countries <- gapminder %>%
filter(continent=="Europe") %>%
ggplot(aes(x=year,y=lifeExp, color=country)) +
geom_line() +
facet_wrap(~country)
az_countries
ggsave("euro_az_countries.png")
## Saving 7 x 5 in image
write.csv(mean_se_sample, "data/mean_se_sample.csv")
tidyrR likes to have ‘long’ format data where every row is an observation and you have a single column for ‘observations’ the others serve to identify that observation. (exceptions apply when you have multiple types of observations) To switch back and forth from ‘wide’ (how we typically enter data in a spreadsheet) to ‘long’ use tidyr
#command to down the wide data
download.file("https://raw.githubusercontent.com/swcarpentry/r-novice-gapminder/gh-pages/data/gapminder_wide.csv", destfile = "data/gapminder_wide.csv")
gapminder_wide <- read.csv("data/gapminder_wide.csv")
gap_long <- gapminder_wide %>%
gather(obstype_year, obs_values,
starts_with('pop'),
starts_with('lifeExp'),
starts_with('gdpPercap'))
head(gap_long)
## continent country obstype_year obs_values
## 1 Africa Algeria pop_1952 9279525
## 2 Africa Angola pop_1952 4232095
## 3 Africa Benin pop_1952 1738315
## 4 Africa Botswana pop_1952 442308
## 5 Africa Burkina Faso pop_1952 4469979
## 6 Africa Burundi pop_1952 2445618
Separate the obs_type column
gap_normal <- gap_long %>%
separate(obstype_year,into=c("obs_type", "obs_year"), sep='_') %>%
spread(obs_type, obs_values)
head(gap_normal)
## continent country obs_year gdpPercap lifeExp pop
## 1 Africa Algeria 1952 2449.008 43.077 9279525
## 2 Africa Algeria 1957 3013.976 45.685 10270856
## 3 Africa Algeria 1962 2550.817 48.303 11000948
## 4 Africa Algeria 1967 3246.992 51.407 12760499
## 5 Africa Algeria 1972 4182.664 54.518 14760787
## 6 Africa Algeria 1977 4910.417 58.014 17152804
#gap_normal <- gap_normal %>%
## arrange(country,continent,year)
#all.equal(gapminder, gap_normal)
head(gap_normal)
## continent country obs_year gdpPercap lifeExp pop
## 1 Africa Algeria 1952 2449.008 43.077 9279525
## 2 Africa Algeria 1957 3013.976 45.685 10270856
## 3 Africa Algeria 1962 2550.817 48.303 11000948
## 4 Africa Algeria 1967 3246.992 51.407 12760499
## 5 Africa Algeria 1972 4182.664 54.518 14760787
## 6 Africa Algeria 1977 4910.417 58.014 17152804